Replicating pipeline and results described in Galbraith et al. (2016). PNAS. https://www.pnas.org/content/113/4/1020



Overview of pipeline



Requirements

if(suppressWarnings(require("purrr"))==F){
  install.packages("purrr")
  library(purrr)
}

if(suppressWarnings(require("tidyverse"))==F){
  install.packages("tidyverse")
  library(tidyverse)
}

if(suppressWarnings(require("stringr"))==F){
  install.packages("stringr")
  library(tidyverse)
}

if(suppressWarnings(require("plyr"))==F){
  install.packages("plyr")
  library(plyr)
}

if(suppressWarnings(require("ggplot2"))==F){
  install.packages("ggplot2")
  library(ggplot2)
}

if(suppressWarnings(require("Rfast"))==F){
  install.packages("Rfast")
  library(Rfast)
}

if(suppressWarnings(require("tryCatchLog"))==F){
  install.packages("tryCatchLog")
  library(tryCatchLog)
}

if(suppressWarnings(require("lmerTest"))==F){
  install.packages("lmerTest")
  library(lmerTest)
}

if(suppressWarnings(require("car"))==F){
  install.packages("car")
  library(car)
}

if(suppressWarnings(require("viridis"))==F){
  install.packages("viridis")
  library(viridis)
}

if(suppressWarnings(require("gridExtra"))==F){
  install.packages("gridExtra")
  library(gridExtra)
}

if(suppressWarnings(require("kableExtra"))==F){
  install.packages("kableExtra")
  library(kableExtra)
}



Setup

Sample metadata

metadata <- read.csv("metadata.csv")
kbl(metadata) %>% kable_styling()
sample.id parent block cross phenotype individual lineage
X8754AQ Q 1 B reproductive X8754 EHB
X875cAQ Q 1 B reproductive X875c EHB
X875gAQ Q 1 B reproductive X875g EHB
X8820AQ Q 2 B reproductive X8820 EHB
X882cAQ Q 2 B reproductive X882c EHB
X882pAQ Q 2 B reproductive X882p EHB
X888aAQ Q 1 A reproductive X888a AHB
X888hAQ Q 1 A reproductive X888h AHB
X888jAQ Q 1 A reproductive X888j AHB
X894bAQ Q 2 A reproductive X894b AHB
X894qAQ Q 2 A reproductive X894q AHB
X894rAQ Q 2 A reproductive X894r AHB
X8751IQ Q 1 B sterile X8751 EHB
X8756IQ Q 1 B sterile X8756 EHB
X875aIQ Q 1 B sterile X875a EHB
X8827IQ Q 2 B sterile X8827 EHB
X882gIQ Q 2 B sterile X882g EHB
X882kIQ Q 2 B sterile X882k EHB
X882mIQ Q 2 B sterile X882m EHB
X888eIQ Q 1 A sterile X888e AHB
X888qIQ Q 1 A sterile X888q AHB
X888uIQ Q 1 A sterile X888u AHB
X894cIQ Q 2 A sterile X894c AHB
X894fIQ Q 2 A sterile X894f AHB
X894iIQ Q 2 A sterile X894i AHB
X8754AD D 1 B reproductive X8754 EHB
X875cAD D 1 B reproductive X875c EHB
X875gAD D 1 B reproductive X875g EHB
X8820AD D 2 B reproductive X8820 EHB
X882cAD D 2 B reproductive X882c EHB
X882pAD D 2 B reproductive X882p EHB
X888aAD D 1 A reproductive X888a AHB
X888hAD D 1 A reproductive X888h AHB
X888jAD D 1 A reproductive X888j AHB
X894bAD D 2 A reproductive X894b AHB
X894qAD D 2 A reproductive X894q AHB
X894rAD D 2 A reproductive X894r AHB
X8751ID D 1 B sterile X8751 EHB
X8756ID D 1 B sterile X8756 EHB
X875aID D 1 B sterile X875a EHB
X8827ID D 2 B sterile X8827 EHB
X882gID D 2 B sterile X882g EHB
X882kID D 2 B sterile X882k EHB
X882mID D 2 B sterile X882m EHB
X888eID D 1 A sterile X888e AHB
X888qID D 1 A sterile X888q AHB
X888uID D 1 A sterile X888u AHB
X894cID D 2 A sterile X894c AHB
X894fID D 2 A sterile X894f AHB
X894iID D 2 A sterile X894i AHB

Process counts

sterile_counts <- read.csv("sterile_counts.csv")
sterile_counts <- sterile_counts[!sterile_counts$ID==".",]
sterile_counts$SNP.gene <- paste(paste(sterile_counts$Chrm,
                                            sterile_counts$SNP.pos,sep="|"),
                                      sterile_counts$ID,sep=":")
row.names(sterile_counts) <- sterile_counts$SNP.gene
sterile_counts$SNP.gene <- NULL
sterile_counts$Chrm <- NULL
sterile_counts$SNP.pos <- NULL
sterile_counts$ID <- NULL

reproductive_counts <- read.csv("reproductive_counts.csv")
reproductive_counts <- reproductive_counts[!reproductive_counts$ID==".",]
reproductive_counts$SNP.gene <- paste(paste(reproductive_counts$Chrm,
                                            reproductive_counts$SNP.pos,sep="|"),
                                      reproductive_counts$ID,sep=":")
row.names(reproductive_counts) <- reproductive_counts$SNP.gene
reproductive_counts$SNP.gene <- NULL
reproductive_counts$Chrm <- NULL
reproductive_counts$SNP.pos <- NULL
reproductive_counts$ID <- NULL

Filter low-count SNPs

  1. Remove rows with 0 counts by cross
  2. Remove rows with > 10000 read counts (cannot run SK test on these)
  3. Remove genes with < 2 SNPs
lcf <- 1

### Remove rows with <lcf counts by cross
L875u <- names(sterile_counts)%in%metadata[metadata$block==1&metadata$cross=="B","sample.id"]
L888u <- names(sterile_counts)%in%metadata[metadata$block==1&metadata$cross=="A","sample.id"]
L882u <- names(sterile_counts)%in%metadata[metadata$block==2&metadata$cross=="B","sample.id"]
L894u <- names(sterile_counts)%in%metadata[metadata$block==2&metadata$cross=="A","sample.id"]
sterile_counts <- sterile_counts[rowSums(sterile_counts[L875u])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[L888u])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[L882u])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[L894u])>lcf,]
rm(L875u,L888u,L882u,L894u)

L875r <- names(reproductive_counts)%in%metadata[metadata$block==1&metadata$cross=="B","sample.id"]
L888r <- names(reproductive_counts)%in%metadata[metadata$block==1&metadata$cross=="A","sample.id"]
L882r <- names(reproductive_counts)%in%metadata[metadata$block==2&metadata$cross=="B","sample.id"]
L894r <- names(reproductive_counts)%in%metadata[metadata$block==2&metadata$cross=="A","sample.id"]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[L875r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[L888r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[L882r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[L894r])>lcf,]
rm(L875r,L888r,L882r,L894r)

### Remove rows with greater than 10000 counts (cannot run SK test on these)
sterile_counts$SUM <- rowSums(sterile_counts)
sterile_counts <- sterile_counts[sterile_counts$SUM<10000,]
sterile_counts$SUM <- NULL

reproductive_counts$SUM <- rowSums(reproductive_counts)
reproductive_counts <- reproductive_counts[reproductive_counts$SUM<10000,]
reproductive_counts$SUM <- NULL

Split count matrices by parent of origin for testing

sterile.pat <- sterile_counts[,metadata[metadata$parent%in%c("D")&
                                         metadata$phenotype=="sterile","sample.id"]]
sterile.mat <- sterile_counts[,metadata[metadata$parent%in%c("Q")&
                                         metadata$phenotype=="sterile","sample.id"]]

reproductive.pat <- reproductive_counts[,metadata[metadata$parent%in%c("D")&
                                     metadata$phenotype=="reproductive","sample.id"]]
reproductive.mat <- reproductive_counts[,metadata[metadata$parent%in%c("Q")&
                                     metadata$phenotype=="reproductive","sample.id"]]



Functions for statistical tests

Storer-Kim test [twobinom]

(Function from the WRS2 package). Test the hypothesis that two independent binomials have equal probability of success using the Storer–Kim method.

twobinom<-function(r1=sum(elimna(x)),n1=length(x),
                   r2=sum(elimna(y)),n2=length(y),
                   x=NA,y=NA,alpha=.05){
  # Modified for efficiency: changed outer() command to Rfast::Outer()
  n1p<-n1+1
  n2p<-n2+1
  n1m<-n1-1
  n2m<-n2-1
  q <- r1/n1
  p <- r2/n2
  if(is.na(q)){q <- 0}
  if(is.na(p)){p <- 0}
  chk<-abs(q-p)
  x<-c(0:n1)/n1
  suppressWarnings(if(is.na(x)){x <- 0})
  y<-c(0:n2)/n2
  suppressWarnings(if(is.na(y)){y <- 0})
  phat<-(r1+r2)/(n1+n2)
  m1<-t(Outer(x,y,"-"))
  m2<-matrix(1,n1p,n2p)
  flag<-(abs(m1)>=chk)
  m3<-m2*flag
  rm(m1,m2,flag)
  xv<-c(1:n1)
  yv<-c(1:n2)
  xv1<-n1-xv+1
  yv1<-n2-yv+1
  dis1<-c(1,pbeta(phat,xv,xv1))
  dis2<-c(1,pbeta(phat,yv,yv1))
  pd1<-NA
  pd2<-NA
  for(i in 1:n1){pd1[i]<-dis1[i]-dis1[i+1]}
  for(i in 1:n2){pd2[i]<-dis2[i]-dis2[i+1]}
  pd1[n1p]<-phat^n1
  pd2[n2p]<-phat^n2
  m4<-t(Outer(pd1,pd2,"*"))
  test<-sum(m3*m4)
  rm(m3,m4)
  list(p.value=test,p1=q,p2=p,est.dif=q-p)
}

Wrapper for Storer-Kim test [test.df]

test.df <- function(pat.exp,mat.exp){
  i.len <- length(row.names(pat.exp))
  return.list <- list()
  for(i in 1:i.len){
    print(paste("Row",i,"of",i.len,sep=" "))
    SNP_gene <- row.names(pat.exp[i,])
    p1.s <- sum(pat.exp[i,])
    p2.s <- sum(mat.exp[i,])
    p.o <- sum(p1.s,p2.s)

    test <- twobinom(r1=p1.s,n1=p.o,r2=p2.s,n2=p.o)$p.value
    
    return.df <- data.frame(SNP_gene=SNP_gene,p=test)
    return.list[[i]] <- return.df
    cat("\014") 
  }
  return(bind_rows(return.list))
}

General linear mixed effects model [GLIMMIX]

GLIMMIX <- function(counts,metadata){
  # Requires tryCatchLog package!
  counts$SNP_gene <- row.names(counts)
  parent.list <- list()
  parent.p.list <- list()
  cross.list <- list()
  cross.p.list <- list()
  counts$geneID <- as.character(unlist(map(strsplit(counts$SNP_gene, split = ":"), 2)))
  genelist <- unique(counts$geneID)
  for(i in 1:length(genelist)){
    cat("\014") 
    print(paste("Row ",i," of ",length(genelist),sep=""))
    counts.sub <- counts[counts$geneID==genelist[i],]
    counts.sub$geneID <- NULL
    counts.sub <- gather(counts.sub, sample.id, count, 
                         names(counts.sub), -SNP_gene, factor_key=TRUE)
    counts.sub <- join(counts.sub, metadata, by = "sample.id")
    counts.sub$parent <- as.factor(str_sub(counts.sub$parent,-1,-1))
    counts.sub$SNP_gene <- as.factor(counts.sub$SNP_gene)
    counts.sub$lineage <- as.factor(counts.sub$lineage)
    counts.sub$individual <- as.factor(counts.sub$individual)
    
    testfail <- F
    test <- "null"
    
    if(length(levels(counts.sub$SNP_gene))>1){
     tryCatchLog(test <- lmer(count~parent+lineage+parent*lineage+(1|SNP_gene)+(1|individual),
                               data=counts.sub),
                 error = function(e) {testfail <- T})
    }else{
     tryCatchLog(test <- lmer(count~parent+lineage+parent*lineage+(1|individual),
                               data=counts.sub),
                 error = function(e) {testfail <- T})
    }
    
    if(class(test)=="character"){testfail <- T}
    
    if(testfail==F){
      test <- summary(test)
      parent.list[i] <- test[["coefficients"]][2,1]
      parent.p.list[i] <- test[["coefficients"]][2,5]
      cross.list[i] <- test[["coefficients"]][3,1]
      cross.p.list[i] <- test[["coefficients"]][3,5]
    }else{
      parent.list[i] <- 0
      parent.p.list[i] <- 1
      cross.list[i] <- 0
      cross.p.list[i] <- 1
    }
  }
  return(data.frame(ID=genelist,
                    parent.est=unlist(parent.list),
                    parent.p=unlist(parent.p.list),
                    cross.est=unlist(cross.list),
                    cross.p=unlist(cross.p.list)))
}



Conduct statistical tests

sterile.SK <- test.df(sterile.pat,sterile.mat)
write.csv(sterile.SK,"sterileSK.csv", row.names=F)

reproductive.SK <- test.df(reproductive.pat,reproductive.mat)
write.csv(reproductive.SK,"reproductiveSK.csv", row.names=F)
sterile.GLIMMIX <- GLIMMIX(sterile_counts,metadata)
write.csv(sterile.GLIMMIX,"sterileGLIMMIX.csv", row.names=F)
reproductive.GLIMMIX <- GLIMMIX(reproductive_counts,metadata)
write.csv(reproductive.GLIMMIX,"reproductiveGLIMMIX.csv", row.names=F)



Split count matrices by cross and parent of origin for plotting

p pat/mat cross parent
p1 pat B D
p1 mat B Q
p2 pat A D
p2 mat A Q
# sterile
## p1
p1.sterile.pat <- sterile_counts[,metadata[metadata$parent%in%c("D")&
                                                     metadata$cross=="B"&
                                                     metadata$phenotype=="sterile","sample.id"]]
p1.sterile.mat <- sterile_counts[,metadata[metadata$parent%in%c("Q")&
                                                     metadata$cross=="B"&
                                                     metadata$phenotype=="sterile","sample.id"]]
## p2
p2.sterile.pat <- sterile_counts[,metadata[metadata$parent%in%c("D")&
                                                     metadata$cross=="A"&
                                                     metadata$phenotype=="sterile","sample.id"]]
p2.sterile.mat <- sterile_counts[,metadata[metadata$parent%in%c("Q")&
                                                     metadata$cross=="A"&
                                                     metadata$phenotype=="sterile","sample.id"]]

# reproductive
## p1
p1.reproductive.pat <- reproductive_counts[,metadata[metadata$parent%in%c("D")&
                                                 metadata$cross=="B"&
                                                 metadata$phenotype=="reproductive","sample.id"]]
p1.reproductive.mat <- reproductive_counts[,metadata[metadata$parent%in%c("Q")&
                                                 metadata$cross=="B"&
                                                 metadata$phenotype=="reproductive","sample.id"]]
## p2
p2.reproductive.pat <- reproductive_counts[,metadata[metadata$parent%in%c("D")&
                                                 metadata$cross=="A"&
                                                 metadata$phenotype=="reproductive","sample.id"]]
p2.reproductive.mat <- reproductive_counts[,metadata[metadata$parent%in%c("Q")&
                                                 metadata$cross=="A"&
                                                 metadata$phenotype=="reproductive","sample.id"]]



Assess test results for each gene

sterile F1s

Set up a data.frame to plot %p1 and %p2 for each SNP

p1.sterile.plot <- data.frame(rowSums(p1.sterile.pat)/
                                  (rowSums(p1.sterile.mat)+
                                   rowSums(p1.sterile.pat)))
names(p1.sterile.plot) <- c("p1")
p1.sterile.plot[is.nan(p1.sterile.plot$p1),"p1"] <- 0

p2.sterile.plot <- data.frame(rowSums(p2.sterile.mat)/
                                  (rowSums(p2.sterile.mat)+
                                   rowSums(p2.sterile.pat)))
names(p2.sterile.plot) <- c("p2")
p2.sterile.plot[is.nan(p2.sterile.plot$p2),"p2"] <- 0

Join results of Storer-Kim tests

sterile.plot <- cbind(p1.sterile.plot,p2.sterile.plot)
sterile.plot <- sterile.plot[row.names(sterile.plot)%in%sterile.SK$SNP_gene,]
sterile.plot$SK.p <- sterile.SK$p
sterile.plot$SNP_gene <- row.names(sterile.plot)
sterile.plot$gene <- as.character(map(strsplit(sterile.plot$SNP_gene, split = ":"), 2))

reproductive F1s

Set up a data.frame to plot %p1 and %p2 for each SNP

p1.reproductive.plot <- data.frame(rowSums(p1.reproductive.pat)/
                                (rowSums(p1.reproductive.mat)+
                                 rowSums(p1.reproductive.pat)))
names(p1.reproductive.plot) <- c("p1")
p1.reproductive.plot[is.nan(p1.reproductive.plot$p1),"p1"] <- 0

p2.reproductive.plot <- data.frame(rowSums(p2.reproductive.mat)/
                                (rowSums(p2.reproductive.mat)+
                                 rowSums(p2.reproductive.pat)))
names(p2.reproductive.plot) <- c("p2")
p2.reproductive.plot[is.nan(p2.reproductive.plot$p2),"p2"] <- 0

Join results of Storer-Kim and GLIMMIX tests

reproductive.plot <- cbind(p1.reproductive.plot,p2.reproductive.plot)
reproductive.plot <- reproductive.plot[row.names(reproductive.plot)%in%reproductive.SK$SNP_gene,]
reproductive.plot$SK.p <- reproductive.SK$p
reproductive.plot$SNP_gene <- row.names(reproductive.plot)
reproductive.plot$gene <- as.character(map(strsplit(reproductive.plot$SNP_gene, split = ":"), 2))



Prep GLIMMIX results for downstream analyses

sterile.GLIMMIX.biased <- data.frame(gene=sterile.GLIMMIX$ID,
                                          parent.p=sterile.GLIMMIX$parent.p,
                                          cross.p=sterile.GLIMMIX$cross.p)

reproductive.GLIMMIX.biased <- data.frame(gene=reproductive.GLIMMIX$ID,
                                        parent.p=reproductive.GLIMMIX$parent.p,
                                        cross.p=reproductive.GLIMMIX$cross.p)



Correct for multiple testing

sterile.plot$SK.padj <- p.adjust(sterile.plot$SK.p,"BH")
sterile.plot$bias <- "NA"
reproductive.plot$SK.padj <- p.adjust(reproductive.plot$SK.p,"BH")
reproductive.plot$bias <- "NA"

sterile.GLIMMIX$parent.padj <- p.adjust(sterile.GLIMMIX$parent.p,"BH")
sterile.GLIMMIX$cross.padj <- p.adjust(sterile.GLIMMIX$cross.p,"BH")
reproductive.GLIMMIX$parent.padj <- p.adjust(reproductive.GLIMMIX$parent.p,"BH")
reproductive.GLIMMIX$cross.padj <- p.adjust(reproductive.GLIMMIX$cross.p,"BH")

sterile.GLIMMIX.biased <- sterile.GLIMMIX[sterile.GLIMMIX$parent.padj<0.05|sterile.GLIMMIX$cross.padj<0.05,1]
reproductive.GLIMMIX.biased <- reproductive.GLIMMIX[reproductive.GLIMMIX$parent.padj<0.05|reproductive.GLIMMIX$cross.padj<0.05,1]



For each gene, check whether all SNPs are biased in the same direction, then plot results

sterile F1s

for(i in 1:length(row.names(sterile.plot))){
  p <- sterile.plot[i,"SK.padj"]
  p1 <- sterile.plot[i,"p1"]
  p2 <- sterile.plot[i,"p2"]
  if(p<0.05&p1>0.6&p2<0.4){sterile.plot[i,"bias"] <- "pat"}
  if(p<0.05&p1<0.4&p2>0.6){sterile.plot[i,"bias"] <- "mat"}
  if(p<0.05&p1<0.4&p2<0.4){sterile.plot[i,"bias"] <- "EHB"}
  if(p<0.05&p1>0.6&p2>0.6){sterile.plot[i,"bias"] <- "AHB"}
}

biaslist <- data.frame(matrix(ncol=2,nrow=0))
names(biaslist) <- c("gene","bias")
genelist <- unique(sterile.plot$gene)

for(i in 1:length(genelist)){
  tmp <- unique(sterile.plot[sterile.plot$gene==genelist[i],"bias"])
  if(length(tmp)>1){
    if(length(tmp)==2){
      if(any(tmp%in%"NA")){
        bias <- tmp[!tmp%in%"NA"]
      }
    }else{
      bias <- "NA"
    }
  }else{bias <- tmp}
  biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
}

sterile.plot <- sterile.plot %>% 
  left_join(biaslist, by = c('gene' = 'gene')) 
names(sterile.plot)[c(7:8)] <- c("xbias","bias")

sterile.plot$bias.plot <- "NA"
for(i in 1:length(row.names(sterile.plot))){
  p1 <- sterile.plot$p1[i]
  p2 <- sterile.plot$p2[i]
  bias <- sterile.plot$bias[i]
  if(!bias=="NA"){
    if(bias=="pat"){if(p1>0.6&p2<0.4){sterile.plot[i,"bias.plot"]<- "pat"}}
    if(bias=="mat"){if(p1<0.4&p2>0.6){sterile.plot[i,"bias.plot"] <- "mat"}}
    if(bias=="EHB"){if(p1<0.4&p2<0.4){sterile.plot[i,"bias.plot"] <- "EHB"}}
    if(bias=="AHB"){if(p1>0.6&p2>0.6){sterile.plot[i,"bias.plot"] <- "AHB"}}
  }
}

sterile.plot0 <- sterile.plot
sterile.plot[!sterile.plot$gene%in%sterile.GLIMMIX.biased,"bias"] <- "NA" 
sterile.plot[!sterile.plot$gene%in%sterile.GLIMMIX.biased,"bias.plot"] <- "NA" 

sterile.plot <- rbind(sterile.plot[sterile.plot$bias.plot%in%c("NA"),],
                      sterile.plot[sterile.plot$bias.plot%in%c("mat", "AHB", "EHB", "pat"),])
sterile.plot$bias.plot <- factor(sterile.plot$bias.plot,
                                 levels = c("NA","mat", "AHB", "EHB", "pat"))
write.csv(sterile.plot,"sterile_PSGE.csv",row.names=F)

Generate plot for sterile F1s

g1 <- ggplot(sterile.plot, aes(x=p1, y=p2,
                               color=bias.plot,alpha=0.8)) + 
  geom_point(size=2) + theme_classic() +
  xlab(expression(paste("% A allele in ",B[mother],
                        " x ",A[father],sep=""))) +
  ylab(expression(paste("% A allele in ",A[mother],
                        " x ",B[father],sep=""))) +
  ggtitle("sterile workers") +
  theme(text = element_text(size=20),
        plot.title = element_text(hjust = 0.5)) +
  scale_color_manual(breaks = levels(sterile.plot$bias.plot)[-c(1)],
                     values=c("grey90", "#000000" ,"#c58791", "#53939e", "#56b4e9")) +
  guides(alpha=F, color=F)
g1

reproductive F1s


for(i in 1:length(row.names(reproductive.plot))){
  p <- reproductive.plot[i,"SK.padj"]
  p1 <- reproductive.plot[i,"p1"]
  p2 <- reproductive.plot[i,"p2"]
  if(p<0.05&p1>0.6&p2<0.4){reproductive.plot[i,"bias"] <- "pat"}
  if(p<0.05&p1<0.4&p2>0.6){reproductive.plot[i,"bias"] <- "mat"}
  if(p<0.05&p1<0.4&p2<0.4){reproductive.plot[i,"bias"] <- "EHB"}
  if(p<0.05&p1>0.6&p2>0.6){reproductive.plot[i,"bias"] <- "AHB"}
}

biaslist <- data.frame(matrix(ncol=2,nrow=0))
names(biaslist) <- c("gene","bias")
genelist <- unique(reproductive.plot$gene)

for(i in 1:length(genelist)){
  tmp <- unique(reproductive.plot[reproductive.plot$gene==genelist[i],"bias"])
  if(length(tmp)>1){
    if(length(tmp)==2){
      if(any(tmp%in%"NA")){
        bias <- tmp[!tmp%in%"NA"]
      }
    }else{
      bias <- "NA"
    }
  }else{bias <- tmp}
  biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
}

reproductive.plot <- reproductive.plot %>% 
  left_join(biaslist, by = c('gene' = 'gene')) 
names(reproductive.plot)[c(7:8)] <- c("xbias","bias")

reproductive.plot$bias.plot <- "NA"
for(i in 1:length(row.names(reproductive.plot))){
  p1 <- reproductive.plot$p1[i]
  p2 <- reproductive.plot$p2[i]
  bias <- reproductive.plot$bias[i]
  if(!bias=="NA"){
    if(bias=="pat"){if(p1>0.6&p2<0.4){reproductive.plot[i,"bias.plot"]<- "pat"}}
    if(bias=="mat"){if(p1<0.4&p2>0.6){reproductive.plot[i,"bias.plot"] <- "mat"}}
    if(bias=="EHB"){if(p1<0.4&p2<0.4){reproductive.plot[i,"bias.plot"] <- "EHB"}}
    if(bias=="AHB"){if(p1>0.6&p2>0.6){reproductive.plot[i,"bias.plot"] <- "AHB"}}
  }
}

reproductive.plot0 <- reproductive.plot
reproductive.plot[!reproductive.plot$gene%in%reproductive.GLIMMIX.biased,"bias"] <- "NA" 
reproductive.plot[!reproductive.plot$gene%in%reproductive.GLIMMIX.biased,"bias.plot"] <- "NA" 

reproductive.plot <- rbind(reproductive.plot[reproductive.plot$bias.plot%in%c("NA"),],
                      reproductive.plot[reproductive.plot$bias.plot%in%c("mat", "AHB", "EHB", "pat"),])
reproductive.plot$bias.plot <- factor(reproductive.plot$bias.plot,
                                 levels = c("NA","mat", "AHB", "EHB", "pat"))
write.csv(reproductive.plot,"reproductive_PSGE.csv",row.names=F)

Generate plot for reproductive F1s

g2 <- ggplot(reproductive.plot, aes(x=p1, y=p2,
                               color=bias.plot,alpha=0.8)) + 
  geom_point(size=2) + theme_classic() +
  xlab(expression(paste("% A allele in ",EHB[mother],
                        " x ",AHB[father],sep=""))) +
  ylab(expression(paste("% A allele in ",AHB[mother],
                        " x ",EHB[father],sep=""))) +
  ggtitle("reproductive workers") +
  theme(text = element_text(size=20),
        plot.title = element_text(hjust = 0.5)) +
  scale_color_manual(breaks = levels(reproductive.plot$bias.plot)[-c(1)],
                     values=c("grey90", "#000000" ,"#c58791", "#53939e", "#56b4e9")) +
  guides(alpha=F, color=F)

g2

Final plot


Make center tri-plot

gmid.df <- data.frame(sterile=c(length(unique(sterile.plot[sterile.plot$bias=="mat","gene"])),
                                     length(unique(sterile.plot[sterile.plot$bias=="AHB","gene"])),
                                     length(unique(sterile.plot[sterile.plot$bias=="EHB","gene"])),
                                     length(unique(sterile.plot[sterile.plot$bias=="pat","gene"]))),
                      Bias=c("mat","AHB","EHB","pat"),
                      reproductive=c(length(unique(reproductive.plot[reproductive.plot$bias=="mat","gene"])),
                                   length(unique(reproductive.plot[reproductive.plot$bias=="AHB","gene"])),
                                   length(unique(reproductive.plot[reproductive.plot$bias=="EHB","gene"])),
                                   length(unique(reproductive.plot[reproductive.plot$bias=="pat","gene"]))))
allgenes <- read.csv("NCBI.csv",header=F)[,c(1)]

## Test if # of sterile biased genes is different from reproductive biased genes
mat.test <- chisq.test(data.frame(Success=c(gmid.df[1,1],gmid.df[1,3]),
                                  Failure=c(length(allgenes)-gmid.df[1,1],
                                            length(allgenes)-gmid.df[1,3]),
                                  row.names=c("sterile","reproductive")))$p.value
AHB.test <- chisq.test(data.frame(Success=c(gmid.df[2,1],gmid.df[2,3]),
                                  Failure=c(length(allgenes)-gmid.df[2,1],
                                            length(allgenes)-gmid.df[2,3]),
                                  row.names=c("sterile","reproductive")))$p.value
## Warning in chisq.test(data.frame(Success = c(gmid.df[2, 1], gmid.df[2, 3]), :
## Chi-squared approximation may be incorrect
EHB.test <- chisq.test(data.frame(Success=c(gmid.df[3,1],gmid.df[3,3]),
                                  Failure=c(length(allgenes)-gmid.df[3,1],
                                            length(allgenes)-gmid.df[3,3]),
                                  row.names=c("sterile","reproductive")))$p.value
pat.test <- chisq.test(data.frame(Success=c(gmid.df[4,1],gmid.df[4,3]),
                                  Failure=c(length(allgenes)-gmid.df[4,1],
                                            length(allgenes)-gmid.df[4,3]),
                                  row.names=c("sterile","reproductive")))$p.value
## Build table
gmid.df$`.` <- c(mat.test,AHB.test,EHB.test,pat.test)

gmid.df <- gmid.df[,c(4,1,2,3)]
nsrows <- row.names(gmid.df[gmid.df$`.`>0.05,])
gmid.df$`.` <- formatC(gmid.df$`.`, format = "e", digits = 2)
gmid.df[nsrows,"."] <- "(ns)"
gmid.df <- gmid.df[,c(2,3,4,1)]

cols <- matrix("black", nrow(gmid.df), ncol(gmid.df))
cols[1,2] <- "#000000"
cols[2,2] <- "#c58791"
cols[3,2] <- "#53939e"
cols[4,2] <- "#56b4e9"

ccols <- matrix("white", nrow(gmid.df), ncol(gmid.df))
ccols[1,3] <- "#f4efea"
ccols[2,3] <- "#f4efea"
ccols[3,3] <- "#f4efea"
ccols[4,3] <- "#f4efea"
ccols[1,1] <- "#f4efea"
ccols[2,1] <- "#f4efea"
ccols[3,1] <- "#f4efea"
ccols[4,1] <- "#f4efea"
ccols[1,2] <- "#e4d8d1"
ccols[2,2] <- "#e4d8d1"
ccols[3,2] <- "#e4d8d1"
ccols[4,2] <- "#e4d8d1"

cfonts <- matrix("plain", nrow(gmid.df), ncol(gmid.df))
cfonts[1,2] <- "bold"
cfonts[2,2] <- "bold"
cfonts[3,2] <- "bold"
cfonts[4,2] <- "bold"

names(gmid.df) <- c("sterile","Bias","reproductive",".")
gmid.df[2,2] <- "AHB"
gmid.df[3,2] <- "EHB"

tt <- ttheme_default(core=list(fg_params = list(col = cols, 
                                                cex = 1,
                                                fontface = cfonts),
                               bg_params = list(col=NA,
                                                fill = ccols),
                               padding.h=unit(2, "mm")),
                     rowhead=list(bg_params = list(col=NA)),
                     colhead=list(bg_params = list(fill = c("#f4efea",
                                                            "#e4d8d1",
                                                            "#f4efea",
                                                            "white")),
                                  fg_params = list(rot=90,
                                                   cex = 1,
                                                   col = c("black",
                                                           "black",
                                                           "black",
                                                           "white"))))

gmid <- tableGrob(gmid.df, rows = NULL, theme=tt)

plot(gmid)

Join plots to generate the final plot

fig1 <- arrangeGrob(g1, gmid, g2, widths=c(5,2.5,5))
ggsave(file="fig1.png", plot=fig1, width=15, height=6)